home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Disc to the Future 2
/
Disc to the Future Part II Programmer's Reference (Wayzata Technology)(6013)(1992).bin
/
MAC
/
MPW_TOOL
/
TOOLS
/
TOOLS_WI
/
ICON_8
/
ICONT_FO
/
TLEX.C
< prev
next >
Wrap
Text File
|
1990-03-02
|
19KB
|
743 lines
/*
* tlex.c -- the lexical analyzer.
*/
#include "::h:config.h"
#include "tproto.h"
#include "trans.h"
#include "token.h"
#include "tlex.h"
#include "tree.h"
#include <ctype.h>
#if MACINTOSH
#if MPW
#include <CursorCtl.h>
#define CURSORINTERVAL 100
#endif /* MPW */
#endif /* MACINTOSH */
/*
* Prototypes.
*/
hidden int ctlesc Params((noargs));
hidden struct toktab *findres Params((noargs));
hidden struct toktab *getident Params((int ac,int *cc));
hidden struct toktab *getnum Params((int ac,int *cc));
hidden struct toktab *getopr Params((int ac,int *cc));
hidden struct toktab *getstring Params((int ac,int *cc));
hidden int hexesc Params((noargs));
hidden int nextchar Params((noargs));
hidden int octesc Params((int ac));
hidden int setfilenm Params((int c));
hidden int setlineno Params((noargs));
#define isletter(s) (isupper(c) | islower(c))
#if !EBCDIC
#define tonum(c) (isdigit(c) ? (c - '0') : ((c & 037) + 9))
/*
* esctab - translates single-character escapes in string literals.
*/
static char esctab[] = {
000, 001, 002, 003, 004, 005, 006, 007, /* NUL-BEL */
010, 011, 012, 013, 014, 015, 016, 017, /* BS -SI */
020, 021, 022, 023, 024, 025, 026, 027, /* DLE-ETB */
030, 031, 032, 033, 034, 035, 036, 037, /* CAN-US */
' ', '!', '"', '#', '$', '%', '&', '\'', /* !"#$%&' */
'(', ')', '*', '+', ',', '-', '.', '/', /* ()*+,-./ */
000, 001, 002, 003, 004, 005, 006, 007, /* 01234567 */
010, 011, ':', ';', '<', '=', '>', '?', /* 89:;<=>? */
'@', 'A', '\b', 'C', 0177, 033, 014, 'G', /* @ABCDEFG */
'H', 'I', 'J', 'K', '\n', 'M', '\n', 'O', /* HIJKLMNO */
'P', 'Q', '\r', 'S', '\t', 'U', 013, 'W', /* PQRSTUVW */
'X', 'Y', 'Z', '[', '\\', ']', '^', '_', /* XYZ[\]^_ */
'`', 'a', '\b', 'c', 0177, 033, 014, 'g', /* `abcdefg */
'h', 'i', 'j', 'k', '\n', 'm', '\n', 'o', /* hijklmno */
'p', 'q', '\r', 's', '\t', 'u', 013, 'w', /* pqrstuvw */
'x', 'y', 'z', '{', '|', '}', '~', 0177, /* xyz{|}~ */
0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
};
#else /* !EBCDIC */
/*
* This is the EBCDIC table for handling escapes.
*/
static char esctab[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
' ', 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 'a', 0x16, 'c', 0x07, 0x27, 0x0c, 'g',
'h', 'i', 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 'j', 'k', 0x25, 'm', 0x15, 'o', 'p',
'q', 0x0d, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 's', 0x05, 'u', 0x0b, 'w', 'x',
'y', 'z', 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 'A', 0x16, 'C', 0x07, 0x27, 0x0c, 'G',
'H', 'I', 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 'J', 'K', 0x25, 'M', 0x15, 'O', 'P',
'Q', 0x0d, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 'S', 0x05, 'U', 0x0b, 'W', 'X',
'Y', 'Z', 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
};
#endif /* !EBCDIC */
struct node tok_loc =
{0, NULL, 0, 0}; /* "model" node containing location of current token */
/*
* yylex - find the next token in the input stream, and return its token
* type and value to the parser.
*
* Variables of interest:
*
* cc - character following last token.
* comflag - set if in a comment.
* nlflag - set if a newline was between the last token and the current token
* lastend - set if the last token was an Ender.
* lastval - when a semicolon is inserted and returned, lastval gets the
* token value that would have been returned if the semicolon hadn't
* been inserted.
*/
static struct toktab *lasttok = NULL;
static int lastend = 0;
static int eofflag = 0;
static int cc = '\n';
int yylex()
{
register struct toktab *t;
register int c;
int nlflag;
int comflag;
static nodeptr lastval;
static struct node semi_loc;
if (lasttok != NULL) {
/*
* A semicolon was inserted and returned on the last call to yylex,
* instead of going to the input, return lasttok and set the
* appropriate variables.
*/
yylval = lastval;
tok_loc = *lastval;
t = lasttok;
goto ret;
}
nlflag = 0;
comflag = 0;
loop:
c = cc;
/*
* Remember where a semicolon will go if we insert one.
*/
semi_loc.n_file = tok_loc.n_file;
semi_loc.n_line = in_line;
semi_loc.n_col = incol;
/*
* Skip whitespace and comments and process #line directives.
*/
while (c == Comment || isspace(c)) {
if (c == '\n') {
nlflag++;
c = NextChar;
if (c == Comment) {
/*
* Check for #line directive at start of line.
*/
if (('l' == (c = NextChar)) &&
('i' == (c = NextChar)) &&
('n' == (c = NextChar)) &&
('e' == (c = NextChar))) {
c = setlineno();
while ((c == ' ') || (c == '\t'))
c = NextChar;
if (c != EOF && c != '\n')
c = setfilenm(c);
}
while (c != EOF && c != '\n')
c = NextChar;
}
}
else {
if (c == Comment) {
while (c != EOF && c != '\n')
c = NextChar;
}
else {
c = NextChar;
}
}
}
/*
* A token is the next thing in the input. Set token location to
* the current line and column.
*/
tok_loc.n_line = in_line;
tok_loc.n_col = incol;
if (c == EOF) {
/*
* End of file has been reached. Set eofflag, return T_Eof, and
* set cc to EOF so that any subsequent scans also return T_Eof.
*/
if (eofflag++) {
eofflag = 0;
cc = '\n';
yylval = NULL;
return 0;
}
cc = EOF;
t = T_Eof;
yylval = NULL;
goto ret;
}
/*
* Look at current input character to determine what class of token
* is next and take the appropriate action. Note that the various
* token gathering routines write a value into cc.
*/
if (isalpha(c) || (c == '_')) { /* gather ident or reserved word */
if ((t = getident(c, &cc)) == NULL)
goto loop;
}
else if (isdigit(c)) { /* gather numeric literal */
if ((t = getnum(c, &cc)) == NULL)
goto loop;
}
else if (c == '"' || c == '\'') { /* gather string or cset literal */
if ((t = getstring(c, &cc)) == NULL)
goto loop;
}
else { /* gather longest legal operator */
if ((t = getopr(c, &cc)) == NULL)
goto loop;
yylval = OpNode(t->t_type);
}
if (nlflag && lastend && (t->t_flags & Beginner)) {
/*
* A newline was encountered between the current token and the last,
* the last token was an Ender, and the current token is a Beginner.
* Return a semicolon and save the current token in lastval.
*/
lastval = yylval;
lasttok = t;
tok_loc = semi_loc;
yylval = OpNode(SEMICOL);
return SEMICOL;
}
ret:
/*
* Clear lasttok, set lastend if the token being returned is an
* Ender, and return the token.
*/
lasttok = 0;
lastend = t->t_flags & Ender;
return (t->t_type);
}
#ifdef MultipleRuns
/*
* yylexinit - initialize variables for multiple runs
*/
novalue yylexinit()
{
lasttok = NULL;
lastend = 0;
eofflag = 0;
cc = '\n';
}
#endif /* MultipleRuns */
/*
* getident - gather an identifier beginning with ac. The character
* following identifier goes in cc.
*/
static struct toktab *getident(ac, cc)
int ac;
int *cc;
{
register int c;
register char *p;
register struct toktab *t;
c = ac;
p = strf;
/*
* Copy characters into string space until a non-alphanumeric character
* is found.
*/
do {
if (p >= stre)
tsyserr("out of string space");
*p++ = c;
c = NextChar;
} while (isalnum(c) || (c == '_'));
if (p >= stre)
tsyserr("out of string space");
*p++ = 0;
*cc = c;
/*
* If the identifier is a reserved word, make a ResNode for it and return
* the token value. Otherwise, install it with putid, make an
* IdNode for it, and return.
*/
if ((t = findres()) != NULL) {
yylval = ResNode(t->t_type);
return t;
}
else {
yylval = IdNode(putid((int)(p-strf)));
return (struct toktab *)T_Ident;
}
}
/*
* findres - if the string just copied into the string space by getident
* is a reserved word, return a pointer to its entry in the token table.
* Return NULL if the string isn't a reserved word.
*/
static struct toktab *findres()
{
register struct toktab *t;
register char c, *p;
p = strf;
c = *p;
if (!islower(c))
return NULL;
/*
* Point t at first reserved word that starts with c (if any).
*/
if ((t = restab[c - 'a']) == NULL)
return NULL;
/*
* Search through reserved words, stopping when a match is found
* or when the current reserved word doesn't start with c.
*/
while (t->t_word[0] == c) {
if (strcmp(t->t_word, p) == 0)
return t;
t++;
}
return NULL;
}
/*
* getnum - gather a numeric literal starting with ac and put the
* character following the literal into *cc.
*/
static struct toktab *getnum(ac, cc)
int ac;
int *cc;
{
register int c, r, state;
char *p;
int realflag;
c = ac;
r = tonum(c);
p = strf;
state = 0;
realflag = 0;
for (;;) {
if (p >= stre)
tsyserr("out of string space");
*p++ = c;
c = NextChar;
switch (state) {
case 0: /* integer part */
if (isdigit(c)) { r = r * 10 + tonum(c); continue; }
if (c == '.') { state = 1; realflag++; continue; }
if (c == 'e' || c == 'E') { state = 2; realflag++; continue; }
if (c == 'r' || c == 'R') {
state = 5;
if (r < 2 || r > 36)
tfatal("invalid radix for integer literal", (char *)NULL);
continue;
}
break;
case 1: /* fractional part */
if (isdigit(c)) continue;
if (c == 'e' || c == 'E') { state = 2; continue; }
break;
case 2: /* optional exponent sign */
if (c == '+' || c == '-') { state = 3; continue; }
case 3: /* first digit after e, e+, or e- */
if (isdigit(c)) { state = 4; continue; }
tfatal("invalid real literal", (char *)NULL);
break;
case 4: /* remaining digits after e */
if (isdigit(c)) continue;
break;
case 5: /* first digit after r */
if ((isdigit(c) || isletter(c)) && tonum(c) < r)
{ state = 6; continue; }
tfatal("invalid integer literal", (char *)NULL);
break;
case 6: /* remaining digits after r */
if (isdigit(c) || isletter(c)) {
if (tonum(c) >= r) { /* illegal digit for radix r */
tfatal("invalid digit in integer literal", (char *)NULL);
r = tonum('z'); /* prevent more messages */
}
continue;
}
break;
}
break;
}
if (p >= stre)
tsyserr("out of string space");
*p++ = 0;
*cc = c;
if (realflag) {
yylval = RealNode(putid((int)(p-strf)));
return T_Real;
}
yylval = IntNode(putid((int)(p-strf)));
return T_Int;
}
/*
* getstring - gather a string literal starting with ac and place the
* character following the literal in *cc.
*/
static struct toktab *getstring(ac, cc)
int ac;
int *cc;
{
register int c, sc;
register char *p;
char *lc;
int len;
sc = c = ac;
p = strf;
lc = 0;
while ((c = NextChar) != sc && c != '\n' && c != EOF) {
contin:
if (c == '_')
lc = p;
else if (!isspace(c))
lc = 0;
if (c == Escape) {
c = NextChar;
#ifdef VarTran
*p++ = Escape;
#else /* VarTran */
if (isoctal(c))
c = octesc(c);
else if (c == 'x')
c = hexesc();
else if (c == '^')
c = ctlesc();
else
c = esctab[c];
#endif /* VarTran */
if (c == EOF)
goto noquote;
}
if (p >= stre)
tsyserr("out of string space");
*p++ = c;
}
if (p >= stre)
tsyserr("out of string space");
*p++ = 0;
if (c == sc)
*cc = ' ';
else {
if (c == '\n' && lc) {
p = lc;
while ((c = NextChar) != EOF && isspace(c)) ;
if (c != EOF)
goto contin;
}
noquote:
tfatal("unclosed quote", (char *)NULL);
*cc = c;
}
if (ac == '"') { /* a string literal */
len = p - strf;
yylval = StrNode(putid((int)len), len);
return T_String;
}
else { /* a cset literal */
len = p - strf;
yylval = CsetNode(putid((int)len), len);
return T_Cset;
}
}
#ifndef VarTran
/*
* ctlesc - translate a control escape -- backslash followed by
* caret and one character.
*/
static int ctlesc()
{
register int c;
c = NextChar;
if (c == EOF)
return EOF;
return (c & 037);
}
/*
* octesc - translate an octal escape -- backslash followed by
* one, two, or three octal digits.
*/
static int octesc(ac)
int ac;
{
register int c, nc, i;
c = 0;
nc = ac;
i = 1;
do {
c = (c << 3) | (nc - '0');
nc = NextChar;
if (nc == EOF)
return EOF;
} while (isoctal(nc) && i++ < 3);
PushChar(nc);
return (c & 0377);
}
/*
* hexesc - translate a hexadecimal escape -- backslash-x
* followed by one or two hexadecimal digits.
*/
static int hexesc()
{
register int c, nc, i;
c = 0;
i = 0;
while (i++ < 2) {
nc = NextChar;
if (nc == EOF)
return EOF;
if (nc >= 'a' && nc <= 'f')
nc -= 'a' - 10;
else if (nc >= 'A' && nc <= 'F')
nc -= 'A' - 10;
else if (isdigit(nc))
nc -= '0';
else {
PushChar(nc);
break;
}
c = (c << 4) | nc;
}
return c;
}
#endif /* VarTran */
/*
* getopr - find the longest legal operator and return a pointer
* to its entry in the token table.
*/
static struct toktab *getopr(ac, cc)
int ac;
int *cc;
{
register struct optab *state;
register char c, i;
state = state0;
c = ac;
for (;;) {
while ((i = state->o_input) && c != i)
state++;
switch (state->o_action) {
case A_Goto:
state = (struct optab *) state->o_val;
c = NextChar;
continue;
case A_Error:
tfatal("invalid character", (char *)NULL);
*cc = ' ';
return NULL;
case A_Return:
*cc = c;
return (struct toktab *)(state->o_val);
case A_Immret:
*cc = ' ';
return (struct toktab *)(state->o_val);
}
}
}
/*
* setlineno - set line number from #line comment, return following char.
*/
static int setlineno()
{
register int c;
while ((c = NextChar) == ' ' || c == '\t')
;
if (c < '0' || c > '9') {
tfatal("no line number in #line directive", "");
while (c != EOF && c != '\n')
c = NextChar;
return c;
}
in_line = 0;
while (c >= '0' && c <= '9') {
in_line = in_line * 10 + (c - '0');
c = NextChar;
}
return c;
}
/*
* setfilenm - set file name from #line comment, return following char.
*
* Assigning to comfile here does not provide the fine-grained
* control over filenames required by a real macro processor.
* setloc() in tcode.c ought to be restored to its earlier form and
* the initialization of filenames fixed.
*/
static int setfilenm(c)
register int c;
{
extern char *comfile;
register char *p;
while (c == ' ' || c == '\t')
c = NextChar;
if (c != '"') {
tfatal("'\"' missing from file name in #line directive", "");
while (c != EOF && c != '\n')
c = NextChar;
return c;
}
p = strf;
while ((c = NextChar) != '"' && c != EOF && c != '\n') {
if (p >= stre)
tsyserr("out of string space");
*p++ = c;
}
*p++ = '\0';
if (c == '"') {
tok_loc.n_file = putid((int)(p-strf));
return NextChar;
}
else {
tfatal("'\"' missing from file name in #line directive", "");
return c;
}
}
/*
* nextchar - return the next character in the input.
*/
static int nextchar()
{
register int c;
#if MACINTOSH
#if MPW
{
static short cursorcount = CURSORINTERVAL;
if (--cursorcount == 0) {
RotateCursor(0);
cursorcount = CURSORINTERVAL;
}
}
#endif /* MPW */
#endif /* MACINTOSH */
if (c = peekc) {
peekc = 0;
return c;
}
c = getc(srcfile);
switch (c) {
case EOF:
if (incol) {
c = '\n';
in_line++;
incol = 0;
peekc = EOF;
break;
}
else {
in_line = 0;
incol = 0;
break;
}
case '\n':
in_line++;
incol = 0;
break;
case '\t':
incol = (incol | 7) + 1;
break;
case '\b':
if (incol)
incol--;
break;
default:
incol++;
}
return c;
}